namespace java com.twitter.simclusters_v2.thriftjava
namespace py gen.twitter.simclusters_v2.identifier
#@namespace scala com.twitter.simclusters_v2.thriftscala
#@namespace strato com.twitter.simclusters_v2

include "com/twitter/simclusters_v2/online_store.thrift"

/**
  * The uniform type for a SimClusters Embeddings.
  * Each embeddings have the uniform underlying storage.
  * Warning: Every EmbeddingType should map to one and only one InternalId.
  **/
enum EmbeddingType {
  // Reserve 001 - 99 for Tweet embeddings
	FavBasedTweet = 1, // Deprecated
	FollowBasedTweet = 2, // Deprecated
	LogFavBasedTweet = 3, // Production Version
	FavBasedTwistlyTweet = 10, // Deprecated
	LogFavBasedTwistlyTweet = 11, // Deprecated
	LogFavLongestL2EmbeddingTweet = 12, // Production Version

  // Tweet embeddings generated from non-fav events
  // Naming convention: {Event}{Score}BasedTweet
  // {Event}: The interaction event we use to build the tweet embeddings
  // {Score}: The score from user InterestedIn embeddings
  VideoPlayBack50LogFavBasedTweet = 21,
  RetweetLogFavBasedTweet = 22,
  ReplyLogFavBasedTweet = 23,
  PushOpenLogFavBasedTweet = 24,

  // [Experimental] Offline generated FavThroughRate-based Tweet Embedding
  Pop1000RankDecay11Tweet = 30,
  Pop10000RankDecay11Tweet = 31,
  OonPop1000RankDecayTweet = 32,

  // [Experimental] Offline generated production-like LogFavScore-based Tweet Embedding
  OfflineGeneratedLogFavBasedTweet = 40,

  // Reserve 51-59 for Ads Embedding
  LogFavBasedAdsTweet = 51, // Experimental embedding for ads tweet candidate
  LogFavClickBasedAdsTweet = 52, // Experimental embedding for ads tweet candidate

  // Reserve 60-69 for Evergreen content
  LogFavBasedEvergreenTweet = 60,
  LogFavBasedRealTimeTweet = 65,

	// Reserve 101 to 149 for Semantic Core Entity embeddings
  FavBasedSematicCoreEntity = 101, // Deprecated
  FollowBasedSematicCoreEntity = 102, // Deprecated
  FavBasedHashtagEntity = 103, // Deprecated
  FollowBasedHashtagEntity = 104, // Deprecated
  ProducerFavBasedSemanticCoreEntity = 105, // Deprecated
  ProducerFollowBasedSemanticCoreEntity = 106,// Deprecated
  FavBasedLocaleSemanticCoreEntity = 107, // Deprecated
  FollowBasedLocaleSemanticCoreEntity = 108, // Deprecated
  LogFavBasedLocaleSemanticCoreEntity = 109, // Deprecated
  LanguageFilteredProducerFavBasedSemanticCoreEntity = 110, // Deprecated
  LanguageFilteredFavBasedLocaleSemanticCoreEntity = 111, // Deprecated
  FavTfgTopic = 112, // TFG topic embedding built from fav-based user interestedIn
  LogFavTfgTopic = 113, // TFG topic embedding built from logfav-based user interestedIn
  FavInferredLanguageTfgTopic = 114, // TFG topic embedding built using inferred consumed languages
  FavBasedKgoApeTopic = 115, // topic embedding using fav-based aggregatable producer embedding of KGO seed accounts.
  LogFavBasedKgoApeTopic = 116, // topic embedding using log fav-based aggregatable producer embedding of KGO seed accounts.
  FavBasedOnboardingApeTopic = 117, // topic embedding using fav-based aggregatable producer embedding of onboarding seed accounts.
  LogFavBasedOnboardingApeTopic = 118, // topic embedding using log fav-based aggregatable producer embedding of onboarding seed accounts.
  LogFavApeBasedMuseTopic = 119, // Deprecated
  LogFavApeBasedMuseTopicExperiment = 120 // Deprecated

  // Reserved 201 - 299 for Producer embeddings (KnownFor)
  FavBasedProducer = 201
  FollowBasedProducer = 202
  AggregatableFavBasedProducer = 203 // fav-based aggregatable producer embedding.
  AggregatableLogFavBasedProducer = 204 // logfav-based aggregatable producer embedding.
  RelaxedAggregatableLogFavBasedProducer = 205 // logfav-based aggregatable producer embedding.
  AggregatableFollowBasedProducer = 206 // follow-based aggregatable producer embedding.
  KnownFor = 300

  // Reserved 301 - 399 for User InterestedIn embeddings
  FavBasedUserInterestedIn = 301
  FollowBasedUserInterestedIn = 302
  LogFavBasedUserInterestedIn = 303
  RecentFollowBasedUserInterestedIn = 304 // interested-in embedding based on aggregating producer embeddings of recent follows
  FilteredUserInterestedIn = 305 // interested-in embedding used by twistly read path
  LogFavBasedUserInterestedInFromAPE = 306
  FollowBasedUserInterestedInFromAPE = 307
  TwiceUserInterestedIn = 308 // interested-in multi-embedding based on clustering producer embeddings of neighbors
  UnfilteredUserInterestedIn = 309
  UserNextInterestedIn = 310 // next interested-in embedding generated from BeT

  // Denser User InterestedIn, generated by Producer embeddings.
  FavBasedUserInterestedInFromPE = 311
  FollowBasedUserInterestedInFromPE = 312
  LogFavBasedUserInterestedInFromPE = 313
  FilteredUserInterestedInFromPE = 314 // interested-in embedding used by twistly read path

  // [Experimental] Denser User InterestedIn, generated by aggregating IIAPE embedding from AddressBook
  LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE = 320
  LogFavBasedUserInterestedAverageAddressBookFromIIAPE = 321
  LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE = 322
  LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE = 323
  LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE = 324
  LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE = 325

  //Reserved 401 - 500 for Space embedding
  FavBasedApeSpace = 401 // DEPRECATED
  LogFavBasedListenerSpace = 402 // DEPRECATED
  LogFavBasedAPESpeakerSpace = 403 // DEPRECATED
  LogFavBasedUserInterestedInListenerSpace = 404 // DEPRECATED

  // Experimental, internal-only IDs
  ExperimentalThirtyDayRecentFollowBasedUserInterestedIn = 10000 // Like RecentFollowBasedUserInterestedIn, except limited to last 30 days
	ExperimentalLogFavLongestL2EmbeddingTweet = 10001 // DEPRECATED
}(persisted = 'true', hasPersonalData = 'false')

/**
  * The uniform type for a SimClusters MultiEmbeddings.
  * Warning: Every MultiEmbeddingType should map to one and only one InternalId.
  **/
enum MultiEmbeddingType {
  // Reserved 0-99 for Tweet based MultiEmbedding

  // Reserved 100 - 199 for Topic based MultiEmbedding
  LogFavApeBasedMuseTopic = 100 // Deprecated
  LogFavApeBasedMuseTopicExperiment = 101 // Deprecated

  // Reserved 301 - 399 for User InterestedIn embeddings
  TwiceUserInterestedIn = 301 // interested-in multi-embedding based on clustering producer embeddings of neighbors
}(persisted = 'true', hasPersonalData = 'true')

// Deprecated. Please use TopicId for future cases.
struct LocaleEntityId {
  1: i64 entityId
  2: string language
}(persisted = 'true', hasPersonalData = 'false')

enum EngagementType {
  Favorite = 1,
  Retweet = 2,
}

struct UserEngagedTweetId {
  1: i64 tweetId(personalDataType = 'TweetId')
  2: i64 userId(personalDataType = 'UserId')
  3: EngagementType engagementType(personalDataType = 'EventType')
}(persisted = 'true', hasPersonalData = 'true')

struct TopicId {
  1: i64 entityId (personalDataType = 'SemanticcoreClassification')
  // 2-letter ISO 639-1 language code
  2: optional string language
  // 2-letter ISO 3166-1 alpha-2 country code
  3: optional string country
}(persisted = 'true', hasPersonalData = 'false')

struct TopicSubId {
  1: i64 entityId (personalDataType = 'SemanticcoreClassification')
  // 2-letter ISO 639-1 language code
  2: optional string language
  // 2-letter ISO 3166-1 alpha-2 country code
  3: optional string country
  4: i32 subId
}(persisted = 'true', hasPersonalData = 'true')

// Will be used for testing purposes in DDG 15536, 15534
struct UserWithLanguageId {
  1: required i64 userId(personalDataType = 'UserId')
  2: optional string langCode(personalDataType = 'InferredLanguage')
}(persisted = 'true', hasPersonalData = 'true')

/**
  * The internal identifier type.
  * Need to add ordering in [[com.twitter.simclusters_v2.common.SimClustersEmbeddingId]]
  * when adding a new type.
  **/
union InternalId {
  1: i64 tweetId(personalDataType = 'TweetId')
  2: i64 userId(personalDataType = 'UserId')
  3: i64 entityId(personalDataType = 'SemanticcoreClassification')
  4: string hashtag(personalDataType = 'PublicTweetEntitiesAndMetadata')
  5: i32 clusterId
  6: LocaleEntityId localeEntityId(personalDataType = 'SemanticcoreClassification')
  7: UserEngagedTweetId userEngagedTweetId
  8: TopicId topicId
  9: TopicSubId topicSubId
  10: string spaceId
  11: UserWithLanguageId userWithLanguageId
}(persisted = 'true', hasPersonalData = 'true')

/**
  * A uniform identifier type for all kinds of SimClusters based embeddings.
  **/
struct SimClustersEmbeddingId {
  1: required EmbeddingType embeddingType
  2: required online_store.ModelVersion modelVersion
  3: required InternalId internalId
}(persisted = 'true', hasPersonalData = 'true')

/**
  * A uniform identifier type for multiple SimClusters embeddings
  **/
struct SimClustersMultiEmbeddingId {
  1: required MultiEmbeddingType embeddingType
  2: required online_store.ModelVersion modelVersion
  3: required InternalId internalId
}(persisted = 'true', hasPersonalData = 'true')
